cd7012ac90457dc907e4678eb69bea1d17aaa22b,src/edu/stanford/nlp/trees/international/pennchinese/CTBErrorCorrectingTreeNormalizer.java,CTBErrorCorrectingTreeNormalizer,normalizeWholeTree,#Tree#TreeFactory#,126

Before Change


    // and presumably should be "NN"
    // a couple of other random errors are corrected here
    for (Tree subtree : newTree) {
      if (subtree.value().equals("ROOT") && subtree.firstChild().isLeaf() && "CP".equals(subtree.firstChild().value())) {
        EncodingPrintWriter.err.println("Correcting error: seriously messed up tree in CTB6: " + newTree, ChineseTreebankLanguagePack.ENCODING);
        List<Tree> children = subtree.getChildrenAsList();
        children = children.subList(1,children.size() - 1);
        subtree.setChildren(children);
      }
      if (subtree.isPreTerminal()) {

After Change


    // there are also several places where "NP" is used as a preterminal tag
    // and presumably should be "NN"
    // a couple of other random errors are corrected here
    for (Tree subtree : newTree) {
      if (subtree.value().equals("CP") && subtree.numChildren() == 1) {
        Tree subsubtree = subtree.firstChild();
        if (subsubtree.value().equals("ROOT")) {
          if (subsubtree.firstChild().isLeaf() && "CP".equals(subsubtree.firstChild().value())) {
            EncodingPrintWriter.err.println("Correcting error: seriously messed up tree in CTB6: " + newTree, ChineseTreebankLanguagePack.ENCODING);
            List<Tree> children = subsubtree.getChildrenAsList();
            children = children.subList(1,children.size());
            subtree.setChildren(children);
            EncodingPrintWriter.err.println("  Corrected as:                                     " + newTree, ChineseTreebankLanguagePack.ENCODING); // spaced to align with above
          }
        }
      }